<?php

include("config.php");

function crawl_page($url, $depth = 5, $filename)
{
    static $seen = array();
    if (isset($seen[$url]) || $depth === 0) {
        return 3;
    }

    $seen[$url] = true;

    $dom = new DOMDocument('1.0');
    // Get source from URL, feed through loadHTMLFile
    $source = file_get_contents($url, false, stream_context_create(array('ssl' => array('verify_peer' => false, 'verify_peer_name' => false))));
    @$dom->loadHTML($source);

    // If error on pull, skip!
    if($source === FALSE) {
        return 1;
    }

    $crawlcount = "0";

    $anchors = $dom->getElementsByTagName('a');
    foreach ($anchors as $element) {
        $href = $element->getAttribute('href');
        if (strpos($href, 'http') !== 0) {
            $path = '/' . ltrim($href, '/');
            $parts = parse_url($url);
            $href = $parts['scheme'] . '://';
            if (isset($parts['user']) && isset($parts['pass'])) {
                $href .= $parts['user'] . ':' . $parts['pass'] . '@';
            }
            $href .= $parts['host'];
            if (isset($parts['port'])) {
                $href .= ':' . $parts['port'];
            }

            if (isset($parts['path'])) {
                $href .= dirname($parts['path'], 1).$path;
            } else {
                $href .= $path;
            }
        }

        $crawlcount++;

        if($crawlcount>$GLOBALS['maxpagecrawl']) { break; }

        crawl_page($href, $depth - 1, $filename);
    }

    $metas = $dom->getElementsByTagName('meta');

    for ($ii = 0; $ii < $metas->length; $ii++)
    {
        $meta = $metas->item($ii);
        if($meta->getAttribute('name') == 'description') {
            $description = $meta->getAttribute('content');
        }

        if($meta->getAttribute('name') == 'keywords') {
            $keywords = $meta->getAttribute('content');
        }
    }

    if(!isset($description)) { $description = "No description..."; }
    if(!isset($keywords)) { $keywords = "No keywords..."; }

    $title = $dom->getElementsByTagName('title');

    if ($title->length) {
        $title = $title->item(0)->nodeValue;
        if(trim($title)=="" || trim($description)=="") {
            return 2;
        }
    } else {
        return 2;
    }

    if(strpos(file_get_contents($filename), "URL: $url") === false) {
        echo "URL: " . $url . "<br />\n"
        . "Title: " . $title . "<br />\n"
        . "Description: " . $description . "<br />\n"
        . "Keywords: " . $keywords . "<br /><br />\n";

        file_put_contents($filename, "URL: " . $url . "\n"
        . "Title: " . $title . "\n"
        . "Description: " . $description . "\n"
        . "Keywords: " . $keywords . "\n\n", FILE_APPEND);
    } else {
        echo "$url exists in " . $filename . PHP_EOL;
        return 3;
    }
}

if(isset($argc)) {
    if($argc == "2") {
        $urlline = $argv[1];
        echo "One argument\r\n";
        if(filter_var(trim($urlline), FILTER_VALIDATE_URL) !== FALSE) {
            echo "Crawling $urlline" . PHP_EOL;
            crawl_page(trim($urlline), $crawl_depth, $GLOBALS['database']);
        }
    }

    if($argc == "3") {
        $urlline = $argv[1];
        $fileout = $argv[2];
        echo "Two arguments\r\n";
        if(filter_var(trim($urlline), FILTER_VALIDATE_URL) !== FALSE) {
            echo "Crawling $urlline and saving to $fileout" . PHP_EOL;
            crawl_page(trim($urlline), $crawl_depth, $fileout);
        }
    }
}
